James Cole - December 2019. Built with R version 3.5.2
This is Notebook contains the final brain age analysis of MS patient data and controls from the UCL cohort, the MAGNIMS consortium and the Imperial College London PET study (n=25). The analysis uses brain-predicted age difference (brain-PAD) to look at brain ageing in the context of MS. The brain-PAD values were generated in PRONTO, using an independent healthy (n=2001) training dataset, and the values were corrected for proportional bias using the intercept and slope of the age by brain-predicted age regression in the training dataset.

Set-up

Clear workspace, set colour palette

rm(list = ls()) ## clear workspace
ms.palette <- c("darkgreen", "darkorange", "red", "blue", "purple") # define MS colour scheme for groups
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.15.4
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] tidyr_1.0.0        survminer_0.4.3    ggpubr_0.2        
##  [4] magrittr_1.5       survival_2.43-3    stringr_1.3.1     
##  [7] sjPlot_2.8.2       sjmisc_2.8.2       scales_1.0.0      
## [10] RColorBrewer_1.1-2 qwraps2_0.4.1      psych_1.8.12      
## [13] pryr_0.1.4         ppcor_1.1          plyr_1.8.4        
## [16] plotrix_3.7-4      metafor_2.0-0      MASS_7.3-51.1     
## [19] lmerTest_3.0-1     lme4_1.1-21        Matrix_1.2-15     
## [22] lm.beta_1.5-1      knitr_1.21         kableExtra_1.1.0  
## [25] jtools_1.1.1       hier.part_1.0-4    gtools_3.8.1      
## [28] gridExtra_2.3      glmmTMB_0.2.3      ggplot2_3.2.1     
## [31] ggstance_0.3.1     emmeans_1.4.2      effects_4.1-4     
## [34] dplyr_0.8.3        data.table_1.11.8  cowplot_1.0.0     
## [37] corrplot_0.84      car_3.0-2          carData_3.0-2     
## 
## loaded via a namespace (and not attached):
##  [1] minqa_1.2.4       colorspace_1.3-2  rio_0.5.16       
##  [4] sjlabelled_1.1.2  estimability_1.3  parameters_0.4.1 
##  [7] rstudioapi_0.10   mvtnorm_1.0-8     xml2_1.2.0       
## [10] codetools_0.2-16  splines_3.5.2     mnormt_1.5-5     
## [13] zeallot_0.1.0     nloptr_1.2.1      ggeffects_0.14.0 
## [16] km.ci_0.5-2       broom_0.5.2       effectsize_0.1.1 
## [19] readr_1.3.1       compiler_3.5.2    httr_1.4.0       
## [22] sjstats_0.17.8    backports_1.1.3   assertthat_0.2.0 
## [25] lazyeval_0.2.1    survey_3.36       cli_1.0.1        
## [28] htmltools_0.3.6   tools_3.5.2       coda_0.19-3      
## [31] gtable_0.2.0      glue_1.3.1        Rcpp_1.0.2       
## [34] cellranger_1.1.0  vctrs_0.2.0       nlme_3.1-137     
## [37] insight_0.8.0     xfun_0.4          openxlsx_4.1.0   
## [40] rvest_0.3.3       lifecycle_0.1.0   zoo_1.8-4        
## [43] hms_0.4.2         parallel_3.5.2    TMB_1.7.15       
## [46] yaml_2.2.0        curl_3.2          KMsurv_0.1-5     
## [49] stringi_1.2.4     bayestestR_0.5.1  boot_1.3-21      
## [52] zip_1.0.0         rlang_0.4.0       pkgconfig_2.0.2  
## [55] evaluate_0.12     lattice_0.20-38   purrr_0.3.2      
## [58] cmprsk_2.2-7      tidyselect_0.2.5  R6_2.3.0         
## [61] generics_0.0.2    DBI_1.0.0         pillar_1.3.1     
## [64] haven_2.0.0       foreign_0.8-71    withr_2.1.2      
## [67] abind_1.4-5       nnet_7.3-12       tibble_2.1.3     
## [70] performance_0.4.3 modelr_0.1.4      crayon_1.3.4     
## [73] survMisc_0.5.5    rmarkdown_1.11    grid_3.5.2       
## [76] readxl_1.2.0      forcats_0.3.0     digest_0.6.18    
## [79] webshot_0.5.1     xtable_1.8-3      numDeriv_2016.8-1
## [82] munsell_0.5.0     viridisLite_0.3.0 mitools_2.4

Get data from CSV and define longitudinal data.frames

df <- read.csv("MS_brain_age_final_long.csv")
df$subtype <- factor(df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS")) # reorder subtype factor to put controls first
df$Cohort <- recode(df$Cohort, JR1 = "Imperial", C0 = "UCL0", C1 = "UCL1", C2 = "UCL2", C3 = "UCL3", C4 = "UCL4" ,C5 = "UCL5", C6 = "UCL6", C7 = "UCL7", A = "Amsterdam", B = "Barcelona", G = "Graz", M = "Milan", R = "Rome", S = "Siena")
df$wb_vol_ratio_icv <- df$WBV / df$ICV

Get data on Brain Age Healthy Training dataset

banc <- read.csv("/Users/jcole/Work/Brain ageing/BANC_2015/BANC_N2003_age_sex_Brain_age_predictions.csv")

Exclude participants with errors in the database & correct time since diagnosis errors

tmp <- df[df$Cohort == 'Amsterdam',] %>% group_by(PatientID) %>% dplyr::summarize(sd = sd(age_at_scan)) %>% arrange(desc(sd)) %>% filter(sd > 2)
excluded_IDs <- sort(tmp$PatientID)
# excluded_IDs <- unique(df[which(df$age_at_scan < df$age_at_baseline_scan1),1])
excluded_scans <- (df %>% filter(str_detect(PatientID, paste(excluded_IDs, collapse = "|"))))["ScanName"]
df <- df %>%
  filter(!str_detect(PatientID, paste(excluded_IDs, collapse = "|")))
rm(tmp)

There were 13 subjects with 38 scans excluded in total. Data entry errors in original spreadsheet; age at baseline not consistent within subject.

Load data for lesion filling analysis

## read in data
lesion_df <- read.csv(file = '~/Work/Brain ageing/Collaborations/MS/MAGNIMS/MAGNIMS20171224_final.csv')
lesion_df$subtype <- factor(lesion_df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS"))
## create ICV ratio variables
lesion_df <- lesion_df %>%
  mutate(ICV = GMV + WMV + CSFV) %>%
  mutate(gm_icv_ratio = GMV/ICV) %>%
  mutate(wm_icv_ratio = WMV/ICV)

Recode scanner status variable to field strength

Pre-post 2004 data only available for one site. Recode to 1.5T or 3T.

table(df$scanner_status)  %>%
  kable(col.names = c("Scanner status", "Frequency")) %>%
  kable_styling(full_width = F, position = "left")
Scanner status Frequency
1.5T 2257
1.5T_post_2004 19
1.5T_pre_2004 447
3T 842
df$field_strength <- recode(df$scanner_status, `1.5T_pre_2004` = "1.5T", `1.5T_post_2004` = "1.5T")
table(df$field_strength) %>%
  kable(col.names = c("Field strength", "Frequency")) %>%
  kable_styling(full_width = F, position = "left")
Field strength Frequency
1.5T 2723
3T 842

Generate baseline only data.frame and show data frame structure

df.bl <- df[(df$age_at_baseline_scan1 == df$age_at_scan),] # define baseline data.frame
str(df)
## 'data.frame':    3565 obs. of  34 variables:
##  $ PatientID                         : Factor w/ 1367 levels "AMSTERDAM_4001",..: 1 1 1 2 2 2 3 3 3 4 ...
##  $ Cohort                            : Factor w/ 15 levels "Amsterdam","Barcelona",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ scanner_status                    : Factor w/ 4 levels "1.5T","1.5T_post_2004",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gender                            : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 1 1 1 1 ...
##  $ control0ppms1cisoforever2other3   : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ control0rest1                     : Factor w/ 2 levels "control","MS": 2 2 2 2 2 2 2 2 2 2 ...
##  $ control0ms1cis2                   : Factor w/ 3 levels "CIS","control",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ age_at_baseline_scan1             : num  57.8 57.8 57.8 44.4 44.4 ...
##  $ disease_duration_at_baseline_scan1: num  6.8 6.8 6.8 6.38 6.38 ...
##  $ DMT_YesNoNA                       : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 2 2 2 2 ...
##  $ DMTYes1                           : Factor w/ 2 levels "No treatment",..: 1 1 1 1 1 1 2 2 2 2 ...
##  $ EDSSbaseline                      : num  2.5 2.5 2.5 3 3 3 4 4 4 2.5 ...
##  $ NoScans                           : int  3 3 3 3 3 3 3 3 3 2 ...
##  $ FUTimeMax                         : num  2.32 2.32 2.32 2.35 2.35 ...
##  $ ScanName                          : Factor w/ 3603 levels "AMSTERDAM_4001_baseline_t1",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ age_at_scan                       : num  57.8 59.1 60.1 44.4 45.5 ...
##  $ EDSSatScan                        : num  2.5 3.5 3 3 2 2.5 4 2.5 4 2.5 ...
##  $ BrainAge                          : num  69.6 73.4 71 58.1 61.8 ...
##  $ BrainPAD                          : num  11.8 14.3 10.8 13.7 16.3 ...
##  $ subtype                           : Factor w/ 5 levels "control","CIS",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ disease_onset_age                 : num  51 51 51 38 38 ...
##  $ disease_duration                  : num  6.8 6.8 6.8 6.38 6.38 ...
##  $ interval                          : num  0 1.3 2.32 0 1.16 ...
##  $ scan_number                       : Factor w/ 10 levels "scan1","scan10",..: 1 3 4 1 3 4 1 3 4 1 ...
##  $ GM_vol                            : num  0.574 0.563 0.573 0.622 0.605 0.59 0.647 0.643 0.633 0.648 ...
##  $ WM_vol                            : num  0.536 0.532 0.52 0.43 0.423 0.413 0.404 0.399 0.387 0.599 ...
##  $ CSF_vol                           : num  0.352 0.349 0.367 0.293 0.309 0.333 0.355 0.358 0.386 0.271 ...
##  $ WBV                               : num  1.11 1.09 1.09 1.05 1.03 ...
##  $ ICV                               : num  1.46 1.44 1.46 1.34 1.34 ...
##  $ gm_vol_ratio_icv                  : num  0.393 0.39 0.392 0.462 0.453 ...
##  $ wm_vol_ratio_icv                  : num  0.367 0.368 0.356 0.32 0.316 ...
##  $ csf_vol_ratio_icv                 : num  0.241 0.242 0.251 0.218 0.231 ...
##  $ wb_vol_ratio_icv                  : num  0.759 0.758 0.749 0.782 0.769 ...
##  $ field_strength                    : Factor w/ 2 levels "1.5T","3T": 1 1 1 1 1 1 1 1 1 1 ...

Basic stats

Total number of subjects, and by group

The total number of subjects included was n = 1354

The total number of MS patients (including CIS) was n = 1204 and healthy controls was n = 150

Number of scans in total and per group

Total number of scans = 3565

Number of people with 2 or more scans

df.bl %>%
  filter(NoScans >= 2) %>%
  group_by(control0rest1) %>%
  tally() %>%
  kable(col.names = c("Group", "n")) %>%
  kable_styling(full_width = F, position = "left")
Group n
control 111
MS 1155

Number of people with 3 or more scans

df.bl %>%
  filter(NoScans >= 3) %>%
  group_by(control0rest1) %>%
  tally() %>%
  kable(col.names = c("Group", "n")) %>%
  kable_styling(full_width = F, position = "left")
Group n
control 64
MS 509

Generate demographics table using qwarps2

options(qwraps2_markup = "markdown", digits = 2)

table1 <-
  list("N" =
         list("Control" = ~ qwraps2::n_perc0(control0rest1 == "control", na_rm = T),
              "MS"  = ~ qwraps2::n_perc0(control0rest1 == "MS", na_rm = T)),
       "N with follow-up" = 
         list("Yes" = ~ sum(NoScans>1)),
     "Gender" =
       list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
            "Male"  = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
        "Number of scans" =
       list("min" = ~ min(NoScans),
            "max" = ~ max(NoScans),
            "mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
       "Age at baseline scan (years)" =
       list("min" = ~ min(age_at_baseline_scan1),
            "max" = ~ max(age_at_baseline_scan1),
            "mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
       "Brain-predicted age at baseline scan (years)" =
       list("min" = ~ min(BrainAge),
            "max" = ~ max(BrainAge),
            "mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
       "Disease duration at baseline (years)" =
       list("min" = ~ min(disease_duration, na.rm = T),
            "max" = ~ max(disease_duration, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
        "EDSS at baseline " =
       list("min" = ~ min(EDSSbaseline, na.rm = T),
            "max" = ~ max(EDSSbaseline, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
       "MS subtype" =
       list("CIS" = ~ qwraps2::n_perc0(subtype == "CIS", show_symbol = T),
            "RRMS"  = ~ qwraps2::n_perc0(subtype == "RRMS", show_symbol = T),
            "SPMS"  = ~ qwraps2::n_perc0(subtype == "SPMS", show_symbol = T),
            "PPMS"  = ~ qwraps2::n_perc0(subtype == "PPMS", show_symbol = T)),
       "Treatment" =
         list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
              "No"  = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
              "Missing" = ~ sum(is.na(DMT_YesNoNA)))
       )

demo.table <- summary_table(dplyr::group_by(df.bl, control0rest1), table1)
print(demo.table, cnames = c("Controls", "MS patients"), rtitle = "Sample Chararcteristics")
Sample Chararcteristics Controls MS patients
N      
   Control 150 (100) 0 (0)
   MS 0 (0) 1,204 (100)
N with follow-up      
   Yes 111 1155
Gender      
   Female 82 (55%) 771 (64%)
   Male 68 (45%) 433 (36%)
Number of scans      
   min 1 1
   max 10 7
   mean (sd) 2.82 ± 1.90 2.61 ± 1.01
Age at baseline scan (years)      
   min 23 15
   max 66 68
   mean (sd) 37.29 ± 9.96 39.41 ± 10.76
Brain-predicted age at baseline scan (years)      
   min 14.5 7.4
   max 70 92
   mean (sd) 38.43 ± 11.12 50.27 ± 14.90
Disease duration at baseline (years)      
   min Inf 0
   max -Inf 48
   mean (sd) NaN ± NA 7.26 ± 7.96
EDSS at baseline      
   min Inf 0
   max -Inf 9
   mean (sd) NaN ± NA 2.60 ± 1.95
MS subtype      
   CIS 0 (0%) 296 (25%)
   RRMS 0 (0%) 677 (56%)
   SPMS 0 (0%) 111 (9%)
   PPMS 0 (0%) 120 (10%)
Treatment      
   Yes 0 (NaN%) 475 (41%)
   No 0 (NaN%) 675 (59%)
   Missing 150 54

Length of follow-up

Get length of follow-up from longitudinal database.

df %>%
  filter(NoScans >= 2) %>%
  group_by(PatientID) %>%
  slice(which.max(interval)) %>%
  group_by(control0rest1) %>%
  dplyr::summarise(mean(interval), sd(interval), min(interval), max(interval)) %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F, position = "left")
control0rest1 mean(interval) sd(interval) min(interval) max(interval)
control 2.0 1.4 0.50 6
MS 3.4 3.1 0.23 15

Demographics by MS subtype

options(qwraps2_markup = "markdown", digits = 2)

table1 <-
  list("N with follow-up" = 
         list("Yes" = ~ sum(NoScans>1)),
    "Gender" =
       list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
            "Male"  = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
        "Number of scans" =
       list("min" = ~ min(NoScans),
            "max" = ~ max(NoScans),
            "mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
       "Age at baseline scan (years)" =
       list("min" = ~ min(age_at_baseline_scan1),
            "max" = ~ max(age_at_baseline_scan1),
            "mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
       "Brain-predicted age at baseline scan (years)" =
       list("min" = ~ min(BrainAge),
            "max" = ~ max(BrainAge),
            "mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
       "Disease duration at baseline (years)" =
       list("min" = ~ min(disease_duration, na.rm = T),
            "max" = ~ max(disease_duration, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
        "EDSS at baseline " =
       list("min" = ~ min(EDSSbaseline, na.rm = T),
            "max" = ~ max(EDSSbaseline, na.rm = T),
            "mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
       "Treatment" =
         list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
              "No"  = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
              "Missing" = ~ sum(is.na(DMT_YesNoNA)))
       )

print(summary_table(dplyr::group_by(df.bl, subtype), table1),
      rtitle = "Sample Chararcteristics",
      cnames = c("Controls", "CIS", "RRMS", "SPMS", "PPMS"))
Sample Chararcteristics Controls CIS RRMS SPMS PPMS
N with follow-up               
   Yes 111 279 653 104 119
Gender               
   Female 82 (55%) 199 (67%) 453 (67%) 67 (60%) 52 (43%)
   Male 68 (45%) 97 (33%) 224 (33%) 44 (40%) 68 (57%)
Number of scans               
   min 1 1 1 1 1
   max 10 5 7 3 5
   mean (sd) 2.82 ± 1.90 2.44 ± 0.98 2.71 ± 1.05 2.25 ± 0.56 2.80 ± 1.07
Age at baseline scan (years)               
   min 23 15 18 30 19
   max 66 55 66 68 65
   mean (sd) 37.29 ± 9.96 33.01 ± 8.08 38.83 ± 9.68 50.11 ± 9.39 48.55 ± 10.04
Brain-predicted age at baseline scan (years)               
   min 14.5 13.7 7.4 40.2 31.0
   max 70 82 92 89 84
   mean (sd) 38.43 ± 11.12 37.60 ± 10.01 51.33 ± 13.32 67.36 ± 10.42 59.77 ± 10.90
Disease duration at baseline (years)               
   min Inf 0.0 0.0 2.5 1.0
   max -Inf 18 42 48 27
   mean (sd) NaN ± NA 0.52 ± 1.50 7.67 ± 7.31 17.44 ± 9.04 6.65 ± 5.63
EDSS at baseline               
   min Inf 0 0 3 2
   max -Inf 4.5 6.5 9.0 8.0
   mean (sd) NaN ± NA 1.36 ± 1.02 2.12 ± 1.40 5.83 ± 1.20 5.10 ± 1.32
Treatment               
   Yes 0 (NaN%) 60 (20%) 356 (56%) 51 (50%) 8 (7%)
   No 0 (NaN%) 234 (80%) 285 (44%) 52 (50%) 104 (93%)
   Missing 150 2 36 8 8

Length of follow-up

Get length of follow-up from longitudinal database.

df %>%
  filter(NoScans >= 2) %>%
  group_by(PatientID) %>%
  slice(which.max(interval)) %>%
  # top_n(n = 1, wt = interval) %>%
  group_by(subtype) %>%
  dplyr::summarise(n = n(), mean = mean(interval), SD = sd(interval), min = min(interval), max = max(interval)) %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F, position = "left")
subtype n mean SD min max
control 111 2.0 1.4 0.50 6.0
CIS 279 3.6 4.0 0.23 15.0
RRMS 653 3.6 3.1 0.45 15.0
SPMS 104 2.4 1.1 0.51 5.5
PPMS 119 2.9 1.6 0.83 6.0

Correlations between demographics and clinical variables

Use corrplot package.

cor.mat <- cbind(df.bl$age_at_scan, df.bl$disease_onset_age, df.bl$disease_duration, df.bl$EDSSatScan)
colnames(cor.mat) <- c("Age at scan", "Age at diagnosis", "Time since diagnosis", "EDSS at scan")
corrplot(cor(cor.mat, use = "pairwise"), type = "upper", method = "color", addCoef.col = T, tl.col = "black", diag = F)

Age histograms

p <- ggplot() + xlab("Age (years)") + xlim(c(18,90)) + theme_cowplot()
banc_plot <- p + geom_histogram(aes(x = age), fill = "darkgoldenrod2", binwidth = 2, data = banc) + labs(title = "Training data", subtitle = "Healthy participants")
ms_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "firebrick",  binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "MS"))  + labs(title = "Test data", subtitle = "MS and CIS patients")
hc_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "darkgreen", binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "control"))  + labs(title = "Test data", subtitle = "Healthy controls")
p2 <- cowplot::plot_grid(banc_plot, ms_plot, hc_plot, labels = "AUTO", nrow = 1)
p2

cowplot::save_plot("~/Work/Articles/Brain age/MS/figures/BANC_MAGNIMS_age_histograms.pdf", p2, base_asp = 2.5)

Baseline brain-age analysis

describeBy(df.bl$BrainPAD, df.bl$control0rest1, mat = T, digits = 3) # brain-PAD by MS patient vs. controls
##     item  group1 vars    n mean   sd median trimmed mad min max range
## X11    1 control    1  150  1.1  6.7   0.64     1.2 6.5 -17  22    39
## X12    2      MS    1 1204 10.9 10.2   9.44    10.2 9.7 -18  49    67
##      skew kurtosis   se
## X11 0.032    0.072 0.55
## X12 0.600    0.315 0.29

Evalute potential covariates

Distribution of scanner field strengths across study sites.

table(df.bl$Cohort, df.bl$field_strength) %>%
  kable() %>%
  kable_styling(full_width = F, position = "left", fixed_thead = T)
1.5T 3T
Amsterdam 193 0
Barcelona 62 90
UCL0 0 21
UCL1 148 6
UCL2 0 16
UCL3 0 147
UCL4 0 35
UCL5 50 0
UCL6 59 0
UCL7 34 0
Graz 174 0
Imperial 0 25
Milan 60 54
Rome 71 0
Siena 109 0
fit <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + ICV + field_strength + Cohort, data = df.bl)
anova(fit)
## Analysis of Variance Table
## 
## Response: BrainPAD
##                                  Df Sum Sq Mean Sq F value Pr(>F)    
## poly(age_at_baseline_scan1, 2)    2    445     223    2.36 0.0948 .  
## gender                            1    741     741    7.86 0.0051 ** 
## ICV                               1     12      12    0.13 0.7223    
## field_strength                    1    693     693    7.34 0.0068 ** 
## Cohort                           14  17705    1265   13.40 <2e-16 ***
## Residuals                      1334 125886      94                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hierarchical partitioning of brain-PAD

a <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv +  field_strength + Cohort, data = df.bl)
summary(a)
## 
## Call:
## lm(formula = BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + 
##     wb_vol_ratio_icv + field_strength + Cohort, data = df.bl)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33.64  -4.49  -0.23   4.18  32.06 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      142.658      3.724   38.31  < 2e-16 ***
## poly(age_at_baseline_scan1, 2)1 -164.500      8.746  -18.81  < 2e-16 ***
## poly(age_at_baseline_scan1, 2)2  -11.913      7.075   -1.68  0.09247 .  
## gendermale                        -1.159      0.403   -2.87  0.00414 ** 
## wb_vol_ratio_icv                -164.464      4.565  -36.03  < 2e-16 ***
## field_strength3T                  -1.744      0.826   -2.11  0.03491 *  
## CohortBarcelona                   -3.691      0.917   -4.03  6.0e-05 ***
## CohortUCL0                         0.774      1.803    0.43  0.66778    
## CohortUCL1                        -2.111      0.784   -2.69  0.00716 ** 
## CohortUCL2                        -0.855      2.007   -0.43  0.67018    
## CohortUCL3                        -3.075      1.124   -2.74  0.00630 ** 
## CohortUCL4                        -5.467      1.521   -3.60  0.00034 ***
## CohortUCL5                        -1.756      1.116   -1.57  0.11591    
## CohortUCL6                        -1.745      1.034   -1.69  0.09173 .  
## CohortUCL7                        -7.367      1.318   -5.59  2.8e-08 ***
## CohortGraz                        -4.994      0.770   -6.49  1.2e-10 ***
## CohortImperial                    -5.469      1.692   -3.23  0.00126 ** 
## CohortMilan                        4.660      0.919    5.07  4.6e-07 ***
## CohortRome                        -0.604      0.968   -0.62  0.53284    
## CohortSiena                        4.164      0.852    4.89  1.1e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.9 on 1334 degrees of freedom
## Multiple R-squared:  0.561,  Adjusted R-squared:  0.555 
## F-statistic: 89.9 on 19 and 1334 DF,  p-value: <2e-16

I = independent variance; J = joint variance between >= 2 variables.

h.p <- hier.part(y = df.bl$BrainPAD, 
                 xcan = df.bl[c("age_at_baseline_scan1","gender", "wb_vol_ratio_icv", "field_strength", "Cohort")], 
                 gof = "Rsqu",
                 barplot = FALSE)
round(h.p$IJ, 3) %>%
  kable() %>%
  kable_styling(full_width = F, position = "left")
I J Total
age_at_baseline_scan1 0.06 -0.06 0.00
gender 0.00 0.00 0.00
wb_vol_ratio_icv 0.39 -0.04 0.34
field_strength 0.01 0.00 0.00
Cohort 0.10 0.01 0.12

Percentage of the explained variance.

round(h.p$I.perc, 1) %>%
  kable() %>%
  kable_styling(full_width = F, position = "left")
I
age_at_baseline_scan1 10.0
gender 0.7
wb_vol_ratio_icv 69.3
field_strength 1.1
Cohort 18.8
bar.colors <- c("darkgoldenrod2", "firebrick")
barplot(t(as.matrix(h.p$IJ[,1:2])),
        names.arg = c("Age", "Sex", "NBV", "Field strength", "Cohort"),
        col = bar.colors,
         ylim = c(0,0.4),
        xlab = "Model predictors",
        ylab = expression(paste("Brain-PAD variance explained R"^"2")))
legend("topright", c("Unique variance", "Shared variance"), fill = bar.colors, bty = "n", title = "Legend")

Results suggest that age, age^2, normalised brain volume (AKA wb_vol_ratio_icv), gender, Cohort and field strength are appropriate covariates.

Predict brain-PAD based on group

Function to run a linear mixed effect (LME) model adjusting for: fixed effects of age, age^2, gender and field strength; random effects of cohort.

run_lm <- function(var, data) {
  m1 <- lmer(BrainPAD ~ data[[var]] +
               poly(age_at_scan, 2) +
               gender +
               field_strength +
               wb_vol_ratio_icv +
               (1|Cohort),
             data = data,
             control = lmerControl(optimizer = "Nelder_Mead"))
  return(m1)
}

Main effect of group (MS vs. controls)

fit <- run_lm("control0rest1", df.bl)
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 9035
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -5.138 -0.642 -0.033  0.619  4.726 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  8.64    2.94    
##  Residual             45.86    6.77    
## Number of obs: 1354, groups:  Cohort, 15
## 
## Fixed effects:
##                       Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)            129.672      3.907 1183.794   33.19  < 2e-16 ***
## data[[var]]MS            6.043      0.770 1265.898    7.85  8.8e-15 ***
## poly(age_at_scan, 2)1 -166.368      8.526 1347.000  -19.51  < 2e-16 ***
## poly(age_at_scan, 2)2  -15.356      6.924 1342.944   -2.22    0.027 *  
## gendermale              -0.936      0.396 1338.609   -2.37    0.018 *  
## field_strength3T        -1.855      0.735  408.407   -2.52    0.012 *  
## wb_vol_ratio_icv      -156.850      4.552 1346.999  -34.46  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) d[[]]M p(__,2)1 p(__,2)2 gndrml fld_3T
## dat[[vr]]MS -0.366                                       
## ply(g__,2)1 -0.430 -0.040                                
## ply(g__,2)2  0.049 -0.063  0.010                         
## gendermale  -0.240  0.070  0.121    0.049                
## fld_strng3T -0.026  0.012 -0.009   -0.025   -0.051       
## wb_vl_rt_cv -0.961  0.212  0.466   -0.042    0.209 -0.051

Estimated marginal means

Generate EMMs for all MS/CIS and healthy controls. Need to re-fit same model as above so that lme4 object can be read by emmeans.

fit <- lmer(BrainPAD ~ control0rest1 +
               poly(age_at_scan, 2) +
               gender +
               field_strength +
              wb_vol_ratio_icv +
               (1|Cohort),
             data = df.bl,
             control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit, pairwise ~ control0rest1)
## $emmeans
##  control0rest1 emmean   SE df lower.CL upper.CL
##  control          4.2 1.04 37      2.1      6.4
##  MS              10.3 0.83 16      8.5     12.1
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast     estimate   SE   df t.ratio p.value
##  control - MS       -6 0.77 1261 -7.800  <.0001 
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger

Meta-analysis looking at all the separate cohorts with MS/CIS patients and controls

Check which cohorts contain healthy controls and patients.

table(df.bl$subtype, df.bl$Cohort) %>%
  kable() %>%
  kable_styling(full_width = F, position = "left")
Amsterdam Barcelona UCL0 UCL1 UCL2 UCL3 UCL4 UCL5 UCL6 UCL7 Graz Imperial Milan Rome Siena
control 0 0 0 0 0 82 16 17 17 10 0 8 0 0 0
CIS 4 84 0 75 0 0 0 0 0 24 95 0 11 1 2
RRMS 131 63 21 77 0 28 0 33 0 0 69 16 66 66 107
SPMS 36 5 0 2 7 23 0 0 0 0 9 1 24 4 0
PPMS 22 0 0 0 9 14 19 0 42 0 1 0 13 0 0

Create data.frame with summary data appropriate for meta-analysis.

tmp0 <- df.bl %>% group_by(Cohort, control0rest1) %>% dplyr::summarise(length(control0rest1))
meta.cohorts <- as.list(tmp0[tmp0$control0rest1 == "control",1])$Cohort
tmp <- df.bl %>%
  filter(str_detect(Cohort, paste(meta.cohorts, collapse = "|")))
tmp1 <- tmp %>% group_by(Cohort, control0rest1) %>% dplyr::summarise(n = n(), Mean = mean(BrainPAD), SD  =  sd(BrainPAD))
tmp2 <- dcast(tmp1, Cohort ~ control0rest1, value.var = "n")
tmp3 <- dcast(tmp1, Cohort ~ control0rest1, value.var = "Mean")
tmp4 <- dcast(tmp1, Cohort ~ control0rest1, value.var = "SD")
names(tmp2) <- c("Cohort", "control_n", "MS_n")
names(tmp3) <- c("Cohort", "control_mean", "MS_mean")
names(tmp4) <- c("Cohort", "control_sd", "MS_sd")
tmp1x <- tmp %>% group_by(Cohort) %>% dplyr::summarise(pooled.SD  =  sd(BrainPAD))
meta.df <- join_all(list(tmp2,tmp3,tmp4, tmp1x), by = 'Cohort', type = 'left')
rm(list = ls(pattern = 'tmp*')) # remove temporary data frames

Fit a random-effects meta-analysis using REML

Using the metafor package.

meta.df <- escalc(m1i = control_mean, sd1i = pooled.SD, n1i = control_n, m2i = MS_mean, sd2i = pooled.SD, n2i = MS_n, measure = "MD", data = meta.df, digits = 2)
meta.results <- rma(yi, vi, data = meta.df, method = "REML")
print(meta.results)
## 
## Random-Effects Model (k = 6; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 11.8177 (SE = 13.0982)
## tau (square root of estimated tau^2 value):      3.4377
## I^2 (total heterogeneity / total variability):   59.03%
## H^2 (total variability / sampling variability):  2.44
## 
## Test for Heterogeneity: 
## Q(df = 5) = 13.4832, p-val = 0.0192
## 
## Model Results:
## 
## estimate      se     zval    pval     ci.lb    ci.ub     
##  -9.4543  1.8649  -5.0695  <.0001  -13.1095  -5.7990  ***
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(meta.results)
## 
##        estimate  ci.lb   ci.ub
## tau^2   11.8177 0.2530 84.1148
## tau      3.4377 0.5030  9.1714
## I^2(%)  59.0288 2.9923 91.1149
## H^2      2.4407 1.0308 11.2548

Forest plot of results

plot.forest %<a-% {
forest(meta.results, ilab = cbind(meta.df$MS_n, meta.df$control_n), ilab.xpos = c(-30,-23), slab = meta.df$Cohort, digits = 1, xlab = "MS vs. Healthy control group mean difference", steps = 6, col = "red", cex = 1.25, pch = 22, bg = "blue"); text(c(-40, -30, -23), 7.6, c("Cohort", "MS n", "HC n"), font = 2, cex = 1.25)
}
plot.forest

cairo_pdf("plots/forest_plot.pdf", 6,5)
plot.forest
dev.off()
## quartz_off_screen 
##                 2

Linear regression analysis in cohort UCL3

Includes covariates: age, age^2, gender.

summary(lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort == "UCL3")))
## 
## Call:
## lm(formula = BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 
##     2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort == 
##     "UCL3"))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -20.74  -3.82   0.12   4.08  14.32 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       98.758      9.912    9.96  < 2e-16 ***
## control0rest1MS                    9.296      1.331    6.98  1.0e-10 ***
## poly(age_at_baseline_scan1, 2)1  -46.263      7.574   -6.11  9.3e-09 ***
## poly(age_at_baseline_scan1, 2)2    0.207      6.350    0.03     0.97    
## gendermale                        -0.740      1.103   -0.67     0.50    
## wb_vol_ratio_icv                -121.374     12.063  -10.06  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.3 on 141 degrees of freedom
## Multiple R-squared:  0.672,  Adjusted R-squared:  0.661 
## F-statistic: 57.9 on 5 and 141 DF,  p-value: <2e-16

Brain-PAD by MS subtype

Using a linear mixed effect (LME) model, to compare subtypes. This analysis excluded controls. Adjusting for age, age^2, gender, field strength, normalised brain volume and cohort.

subtypes <- run_lm("subtype", subset(df.bl, df.bl$subtype != "control"))
summary(subtypes)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 7970
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.860 -0.652 -0.013  0.607  4.523 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  5.35    2.31    
##  Residual             43.86    6.62    
## Number of obs: 1204, groups:  Cohort, 15
## 
## Fixed effects:
##                       Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)            126.193      3.956 1133.060   31.90  < 2e-16 ***
## data[[var]]RRMS          5.122      0.583  927.421    8.78  < 2e-16 ***
## data[[var]]SPMS          6.585      0.938 1127.828    7.02  3.8e-12 ***
## data[[var]]PPMS          4.507      1.069  370.482    4.21  3.2e-05 ***
## poly(age_at_scan, 2)1 -172.683      8.698 1193.855  -19.85  < 2e-16 ***
## poly(age_at_scan, 2)2  -16.532      6.884 1189.035   -2.40    0.016 *  
## gendermale              -0.764      0.415 1188.498   -1.84    0.066 .  
## field_strength3T        -0.626      0.713  230.278   -0.88    0.381    
## wb_vol_ratio_icv      -150.823      4.756 1194.873  -31.71  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) d[[]]R d[[]]S d[[]]P p(__,2)1 p(__,2)2 gndrml fld_3T
## dt[[v]]RRMS -0.282                                                     
## dt[[v]]SPMS -0.334  0.560                                              
## dt[[v]]PPMS -0.194  0.498  0.478                                       
## ply(g__,2)1 -0.365 -0.063 -0.182 -0.212                                
## ply(g__,2)2  0.056  0.051 -0.108 -0.096  0.055                         
## gendermale  -0.218  0.010 -0.007 -0.104  0.129    0.059                
## fld_strng3T -0.073  0.202  0.058  0.120 -0.021   -0.010   -0.053       
## wb_vl_rt_cv -0.974  0.162  0.258  0.098  0.398   -0.060    0.197 -0.016
anova(subtypes)
## Type III Analysis of Variance Table with Satterthwaite's method
##                      Sum Sq Mean Sq NumDF DenDF F value Pr(>F)    
## data[[var]]            3701    1234     3   773   28.13 <2e-16 ***
## poly(age_at_scan, 2)  17365    8683     2  1191  197.95 <2e-16 ***
## gender                  149     149     1  1188    3.39  0.066 .  
## field_strength           34      34     1   230    0.77  0.381    
## wb_vol_ratio_icv      44108   44108     1  1195 1005.61 <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Brain-PAD estimated marginal means for subtypes

Generate EMMs for all MS subtypes.

fit_subtype <- lmer(BrainPAD ~ subtype +
               poly(age_at_scan, 2) +
               gender +
               field_strength +
              wb_vol_ratio_icv +
               (1|Cohort),
            # data = subset(df.bl, df.bl$control0rest1 != "control"),
            data = df.bl,
             control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_subtype, pairwise ~ subtype)
## $emmeans
##  subtype emmean   SE df lower.CL upper.CL
##  control    4.3 0.90 50      2.5      6.1
##  CIS        6.4 0.80 33      4.8      8.1
##  RRMS      11.6 0.69 20     10.1     13.0
##  SPMS      13.0 0.95 72     11.1     14.9
##  PPMS      10.4 0.97 64      8.5     12.4
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast       estimate   SE   df t.ratio p.value
##  control - CIS      -2.2 0.92  724 -2.400  0.1300 
##  control - RRMS     -7.3 0.81  868 -9.000  <.0001 
##  control - SPMS     -8.7 1.01 1227 -8.600  <.0001 
##  control - PPMS     -6.2 0.95 1296 -6.500  <.0001 
##  CIS - RRMS         -5.1 0.58 1100 -8.900  <.0001 
##  CIS - SPMS         -6.6 0.92 1295 -7.200  <.0001 
##  CIS - PPMS         -4.0 1.01  659 -4.000  <.0001 
##  RRMS - SPMS        -1.4 0.76 1342 -1.900  0.3200 
##  RRMS - PPMS         1.1 0.87  700  1.300  0.6900 
##  SPMS - PPMS         2.6 0.99 1110  2.600  0.0700 
## 
## Results are averaged over the levels of: gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 5 estimates

Save table for importing into Word

x <- as.data.frame(emmeans(object = fit_subtype, pairwise ~ subtype)$contrasts)
write.table(format(x, digits = 3), file = "~/Work/Articles/Brain age/MS/Annals Neurology/pairwise_brain_PAD.txt", sep = ",", quote = F, row.names = F)

Plot the EMMs and confidence intervals

plot_model(fit_subtype, type = "pred", terms = c("subtype")) +
  theme_cowplot()

Brain-PAD boxplot by MS subtype

# calculate Ns
control_n <- with(df.bl, table(subtype))["control"]
cis_n <- with(df.bl, table(subtype))["CIS"]
rrms_n <- with(df.bl, table(subtype))["RRMS"]
spms_n <- with(df.bl, table(subtype))["SPMS"]
ppms_n <- with(df.bl, table(subtype))["PPMS"]

plot.pryr %<a-% {
with(df.bl, ehplot(BrainPAD, groups = subtype,
                   ylim = c(-20,52), pch = as.numeric(subtype) + 20, bg = ms.palette[as.numeric(subtype)], box = T, offset = 0.1, intervals = 50,
                   xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) + 
  abline(0,0, lty = 2) +
  text(1,51, paste("N = ", control_n, sep = ""), cex = 0.8) +
  text(2,51, paste("N = ", cis_n, sep = ""), cex = 0.8) +
  text(3,51, paste("N = ", rrms_n, sep = ""), cex = 0.8) +
  text(4,51, paste("N = ", spms_n, sep = ""), cex = 0.8) +
  text(5,51, paste("N = ", ppms_n, sep = ""), cex = 0.8)
}
plot.pryr

## integer(0)
cairo_pdf(filename = "~/Work/Brain ageing/Collaborations/MS/plots/MS_UCL_MAGNIMS_combined_group_Brain-PAD.pdf", width = 8, height = 8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen 
##                 2

Brain-PAD boxplot by cohort and by MS subtype

Also, Estimate marginal means plot

Uses sjPlot to calculated EMMs from linear regression model

p1 <- ggplot(df.bl, aes(x = subtype, y = BrainPAD, fill = subtype)) +
  geom_boxplot(outlier.size = 0.75) +
  facet_wrap(df.bl$Cohort, nrow = 1) +
  geom_hline(aes(yintercept = 0), lty = 2) +
  theme_cowplot(font_size = 6) +
  labs(x = "MS subtype", y = "Brain-PAD (years)") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none", axis.title.x = element_text(vjust = -3), axis.title.y = element_text(vjust = 3)) +
  scale_fill_manual(values = ms.palette)

fit <- lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + field_strength + wb_vol_ratio_icv + Cohort, data = df.bl)
p2 <- plot_model(fit, type = "emm", terms = c("Cohort", "control0rest1"), colors = "bw", show.legend = F) +
  labs(x = "Cohort", y = "Brain-PAD (years)") +
  theme_cowplot(font_size = 6) +
  theme(axis.title.x = element_text(vjust = -3), axis.title.y = element_text(vjust = 3))

p3 <- plot_grid(list(p1,p2), tags = T, margin = c(0.5,0.5,0.5,0.5))

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/brainPAD_cohort_subtype_boxplot_EMM_plot.pdf", width = 15, height = 10, useDingbats = FALSE)
ggsave(plot = p3, "~/Work/Articles/Brain age/MS/figures/brainPAD_cohort_subtype_boxplot_EMM_plot.tif", width = 170, height = 120, device = "tiff", units = "mm")

Brain-PAD boxplot by MS subtype in cohort UCL3 only

# calculate Ns
C3.bl <- df.bl %>% filter(Cohort == "UCL3") 
C3.bl$subtype <- factor(C3.bl$subtype)
control_n <- with(C3.bl, table(subtype))["control"]
rrms_n <- with(C3.bl, table(subtype))["RRMS"]
spms_n <- with(C3.bl, table(subtype))["SPMS"]
ppms_n <- with(C3.bl, table(subtype))["PPMS"]

plot.pryr %<a-% {
  with(C3.bl, ehplot(BrainPAD, groups = subtype, ylim = c(-20,52), pch = as.numeric(subtype) + 20, bg = ms.palette[-2][as.numeric(subtype)], box = T, offset = 0.1, intervals = 50, xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) + 
  abline(0,0, lty = 2) +
  text(1,51, paste("N = ", control_n, sep = ""), cex = 1.2) +
  text(2,51, paste("N = ", rrms_n, sep = ""), cex = 1.2) +
  text(3,51, paste("N = ", spms_n, sep = ""), cex = 1.2) +
  text(4,51, paste("N = ", ppms_n, sep = ""), cex = 1.2)
}
plot.pryr

## integer(0)
cairo_pdf("~/Work/Brain ageing/Collaborations/MS/plots/MS_cohort_C3_Brain-PAD.pdf", 8,8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen 
##                 2

Cohort UCL3 statistics

Use OLS regression, no random effects necessary.

fit_UCL3 <- lm(BrainPAD ~ subtype +
                 poly(age_at_scan, 2) +
                 gender +
                 wb_vol_ratio_icv,
               data = subset(df.bl, df.bl$Cohort == "UCL3"))
anova(fit_UCL3)
## Analysis of Variance Table
## 
## Response: BrainPAD
##                       Df Sum Sq Mean Sq F value Pr(>F)    
## subtype                3   7251    2417   60.49 <2e-16 ***
## poly(age_at_scan, 2)   2    493     246    6.17 0.0027 ** 
## gender                 1    147     147    3.67 0.0573 .  
## wb_vol_ratio_icv       1   3639    3639   91.07 <2e-16 ***
## Residuals            139   5553      40                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(object = fit_UCL3, pairwise ~ subtype)
## $emmeans
##  subtype emmean   SE  df lower.CL upper.CL
##  control    3.0 1.00 139      1.1      5.0
##  RRMS      11.7 1.34 139      9.0     14.3
##  SPMS      13.7 1.70 139     10.3     17.0
##  PPMS      12.7 1.82 139      9.1     16.3
## 
## Results are averaged over the levels of: gender 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast       estimate   SE  df t.ratio p.value
##  control - RRMS     -8.6 1.55 139 -5.600  <.0001 
##  control - SPMS    -10.6 1.95 139 -5.500  <.0001 
##  control - PPMS     -9.7 2.00 139 -4.900  <.0001 
##  RRMS - SPMS        -2.0 1.93 139 -1.000  0.7400 
##  RRMS - PPMS        -1.1 2.15 139 -0.500  0.9600 
##  SPMS - PPMS         0.9 2.21 139  0.400  0.9800 
## 
## Results are averaged over the levels of: gender 
## P value adjustment: tukey method for comparing a family of 4 estimates

Brain-PAD by subtype descriptive statistics

 # brain-PAD by MS patient subtypes and controls
with(df.bl, describeBy(BrainPAD, subtype, mat = T, digits = 1)[-1]) %>%
  kable() %>%
  kable_styling(full_width = F, position = "left")
group1 vars n mean sd median trimmed mad min max range skew kurtosis se
X11 control 1 150 1.1 6.7 0.6 1.2 6.5 -16.5 22 39 0.0 0.1 0.6
X12 CIS 1 296 4.6 7.2 4.3 4.3 6.5 -13.1 32 44 0.5 0.9 0.4
X13 RRMS 1 677 12.5 10.0 11.1 12.0 9.4 -18.2 49 67 0.5 0.3 0.4
X14 SPMS 1 111 17.3 10.5 16.9 17.1 11.1 -6.7 44 51 0.2 -0.4 1.0
X15 PPMS 1 120 11.2 10.3 10.3 10.7 10.6 -5.3 46 52 0.5 0.1 0.9

Lesion filling

To establish whether using the FSL lesion filling software influences brain-predicted age values. This analysis was conducted only in UCL patients (n = 575).

with(lesion_df, describeBy(filled_brain_age, subtype, mat = T)[-1,-1]) %>%
  kable() %>%
  kable_styling()
group1 vars n mean sd median trimmed mad min max range skew kurtosis se
X12 CIS 1 8 42 6.2 43 42 7.9 34 50 15 -0.09 -1.98 2.18
X13 RRMS 1 382 55 11.5 55 55 12.1 24 88 63 -0.03 -0.44 0.59
X14 SPMS 1 119 65 9.2 66 65 8.4 40 82 41 -0.74 -0.03 0.85
X15 PPMS 1 66 62 10.1 60 62 10.6 44 84 40 0.46 -0.61 1.25

Correlation between brain-predicted age from filled and unfilled images: Pearson’s r = 0.99. Median absolute error (MAE) between brain-predicted age from filled and unfilled images = 0.37 years. Mean difference between brain-predicted age from filled and unfilled images = 0.28 ± 1.29 years.

Lesion filled vs. unfilled scatterplot

ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = brain_age, y = filled_brain_age)) +
  geom_abline(slope = 1) +
  geom_point(aes(shape = subtype, fill = subtype), size = 2) +
  labs(x = "Unfilled brain-predicted age (years)", y = "Lesion-filled brain-predicted age (years)") +
  xlim(c(20,90)) +
  scale_fill_manual(values = ms.palette[-1]) +
  scale_shape_manual(values = c(22:25)) +
  theme_cowplot() + theme(legend.position = c(0.8, 0.2))
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_plot.pdf", width = 5, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).

Lesion filled vs. unfilled Bland-Altman plot

mean.diff <- mean(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
sd.diff <- sd(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = ((brain_age + filled_brain_age)/2), y = brain_age - filled_brain_age)) +
  geom_abline(slope = 0, lty = 2) +
  geom_point(aes(shape = subtype, fill = subtype), size = 2) +
  geom_hline(yintercept = mean.diff, color = "darkgoldenrod1", lwd = 1) + # mean difference line
  geom_hline(yintercept = mean.diff + 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # upper 95% line
  geom_hline(yintercept = mean.diff - 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # lower 95% line
  # geom_smooth(method = "lm", level = 0.95, color = "black", lwd = 0.3) +
  labs(x = "Mean of filled/unfilled brain-predicted age (years)", y = "Unfilled - Lesion-filled brain-predicted age (years)") +
  # ylim(c(-20,20)) +
  scale_fill_manual(values = ms.palette[-1]) +
  scale_shape_manual(values = c(22:25)) +
  theme_cowplot() + theme(legend.position = c(0.1, 0.8)) +
  annotate("text", x = 25, y = mean.diff + 1.96*sd.diff + 0.5, label = "+1.96*SD", color = "darkgoldenrod2") +
  annotate("text", x = 25, y = mean.diff - 1.96*sd.diff - 0.5, label = "-1.96*SD", color = "darkgoldenrod2") +
  annotate("text", x = 27.5, y = mean.diff + 0.8, label = "Mean difference", color = "darkgoldenrod2")
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_BA_plot.pdf", width = 8, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).

Correlates of brain-PAD at baseline

EDSS score, an index of disability

LME model accounting for fixed effects of age at baseline, age^2, gender, field strength, normalised brain volume and random effects of Cohort.

summary(run_lm("EDSSbaseline", df.bl))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 7813
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.989 -0.634 -0.047  0.611  4.819 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  8.43    2.90    
##  Residual             45.05    6.71    
## Number of obs: 1174, groups:  Cohort, 14
## 
## Fixed effects:
##                       Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)            133.865      4.122 1061.648   32.47  < 2e-16 ***
## data[[var]]              0.637      0.142 1116.023    4.50  7.6e-06 ***
## poly(age_at_scan, 2)1 -186.531      9.346 1165.039  -19.96  < 2e-16 ***
## poly(age_at_scan, 2)2  -25.621      7.329 1160.993   -3.50  0.00049 ***
## gendermale              -0.958      0.422 1159.898   -2.27  0.02328 *  
## field_strength3T        -1.790      0.734  396.618   -2.44  0.01512 *  
## wb_vol_ratio_icv      -156.427      4.924 1165.186  -31.77  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.374                                       
## ply(g__,2)1 -0.333 -0.224                                
## ply(g__,2)2  0.082 -0.127  0.052                         
## gendermale  -0.221  0.007  0.108    0.049                
## fld_strng3T -0.035  0.031 -0.012   -0.028   -0.051       
## wb_vl_rt_cv -0.972  0.290  0.366   -0.076    0.196 -0.041

When predicting brain-PAD in a LME model, the effect of EDSS at baseline beta = 0.64, 95% CI = 0.36, 0.91, p = 8e-06.

Testing for a polynomial fit of EDSS baseline

fit.edss.poly <- lmer(BrainPAD ~ 
                        poly(EDSSbaseline,2) +
                        poly(age_at_scan, 2) +
                        gender +
                        field_strength +
                        wb_vol_ratio_icv +
                        (1|Cohort),
                      data = subset(df.bl, !is.na(df.bl$EDSSbaseline)),
                      control = lmerControl(optimizer = "Nelder_Mead"))
summary(fit.edss.poly)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ poly(EDSSbaseline, 2) + poly(age_at_scan, 2) + gender +  
##     field_strength + wb_vol_ratio_icv + (1 | Cohort)
##    Data: subset(df.bl, !is.na(df.bl$EDSSbaseline))
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 7797
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.926 -0.628 -0.053  0.607  4.821 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept)  8.42    2.90    
##  Residual             44.99    6.71    
## Number of obs: 1174, groups:  Cohort, 14
## 
## Fixed effects:
##                        Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)             135.338      3.993 1050.157   33.89  < 2e-16 ***
## poly(EDSSbaseline, 2)1   43.388      9.451 1113.921    4.59  4.9e-06 ***
## poly(EDSSbaseline, 2)2  -11.381      7.043 1165.171   -1.62  0.10637    
## poly(age_at_scan, 2)1  -175.130      8.764 1164.110  -19.98  < 2e-16 ***
## poly(age_at_scan, 2)2   -23.270      6.916 1159.298   -3.36  0.00079 ***
## gendermale               -0.954      0.421 1158.860   -2.26  0.02371 *  
## field_strength3T         -1.799      0.733  395.462   -2.45  0.01455 *  
## wb_vol_ratio_icv       -156.399      4.920 1164.186  -31.79  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) p(EDSS,2)1 p(EDSS,2)2 p(__,2)1 p(__,2)2 gndrml fld_3T
## pl(EDSS,2)1 -0.294                                                      
## pl(EDSS,2)2  0.002 -0.061                                               
## ply(g__,2)1 -0.363 -0.224      0.026                                    
## ply(g__,2)2  0.073 -0.122     -0.076      0.040                         
## gendermale  -0.228  0.007     -0.005      0.108    0.050                
## fld_strng3T -0.033  0.030      0.008     -0.012   -0.029   -0.051       
## wb_vl_rt_cv -0.975  0.289     -0.004      0.367   -0.076    0.196 -0.042

Non-parametric correlation between brain-PAD and EDSS at baseline

with(df.bl, cor.test(BrainPAD, EDSSbaseline, method = "spearman"))
## Warning in cor.test.default(BrainPAD, EDSSbaseline, method = "spearman"):
## Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  BrainPAD and EDSSbaseline
## S = 2e+08, p-value <2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##  rho 
## 0.26

Plot EDSS histogram by subtype

ggplot(subset(df.bl, !is.na(df.bl$EDSSbaseline)), aes(EDSSbaseline, fill = subtype)) +
  scale_fill_manual(values = ms.palette[-1]) +
  geom_histogram(bins = 20) +
  facet_wrap(~ subtype) +
  theme_cowplot()

Test for interaction between subtype and EDSS on brain-PAD

fit.edss <- lmer(BrainPAD ~ EDSSbaseline * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
round(as.matrix(anova(fit.edss)["EDSSbaseline:subtype",]),3) %>% 
  kable(digits = 2) %>%
  kable_styling()
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
EDSSbaseline:subtype 138 46 3 1158 1.1 0.36

Use simple slopes from jtools to extract adjusted slopes for each subtype. Need to fit model without age^2 as poly() is incompatible with sim_slopes().

fit.edss2 <- lmer(BrainPAD ~ EDSSbaseline * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
sim_slopes(fit.edss2, pred = "EDSSbaseline", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of EDSSbaseline when subtype = CIS: 
##  Est. S.E.   df    p
##  0.15 0.40 0.37 0.71
## 
## Slope of EDSSbaseline when subtype = RRMS: 
##  Est. S.E.   df    p
##  0.79 0.21 3.78 0.00
## 
## Slope of EDSSbaseline when subtype = SPMS: 
##  Est. S.E.   df    p
##  0.20 0.53 0.38 0.71
## 
## Slope of EDSSbaseline when subtype = PPMS: 
##  Est. S.E.   df    p
##  0.27 0.46 0.58 0.56

Use interact_plot() from jtools to plot the adjusted slopes per group.

edss.plot <- interact_plot(fit.edss2, pred = "EDSSbaseline", modx = "subtype",
                           plot.points = T,
                           interval = T,
                           vary.lty = T,
                           facet.modx = T,
                           x.label = "EDSS", y.label = "Brain-PAD (years)",
                           point.size = 0.5, 
                           modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) +
  geom_hline(yintercept = 0, lty = 2) +
  scale_fill_manual(values = ms.palette[2:5], name = "MS subtype") +
  scale_color_manual(values = ms.palette[2:5], name = "MS subtype") +
  theme_cowplot(font_size = 9)
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)

Age at diagnosis

LME accounting for fixed effects of age at baseline, age^2, gender, and random effects of Cohort and field strength. Exclude CIS patients and healthy controls.

summary(run_lm("disease_onset_age", subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS")))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 5947
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.729 -0.605 -0.035  0.611  4.234 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept) 10.9     3.30    
##  Residual             42.7     6.53    
## Number of obs: 900, groups:  Cohort, 14
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            134.7440     4.0645  766.4048   33.15  < 2e-16 ***
## data[[var]]             -0.1611     0.0359  891.8587   -4.49  8.2e-06 ***
## poly(age_at_scan, 2)1 -117.9941    11.9737  892.6532   -9.85  < 2e-16 ***
## poly(age_at_scan, 2)2  -13.8667     6.6754  883.2836   -2.08   0.0381 *  
## gendermale              -0.1854     0.4704  881.6072   -0.39   0.6936    
## field_strength3T         2.7825     0.9648  189.7490    2.88   0.0044 ** 
## wb_vol_ratio_icv      -151.5788     5.2407  888.6360  -28.92  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.005                                       
## ply(g__,2)1 -0.278 -0.743                                
## ply(g__,2)2  0.047  0.036 -0.033                         
## gendermale  -0.217 -0.086  0.125    0.067                
## fld_strng3T -0.097  0.062 -0.055   -0.004   -0.042       
## wb_vl_rt_cv -0.926 -0.287  0.492   -0.060    0.199 -0.010

When predicting brain-PAD in a LME model, the effect of age at diagnosis at baseline beta = -0.16, 95% CI = -0.23, -0.09, p = 8e-06.

Test for interaction between subtype and age at diagnosis on brain-PAD:

fit.age <- lmer(BrainPAD ~ disease_onset_age * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
round(as.matrix(anova(fit.age)["disease_onset_age:subtype",]),3) %>%
  kable(digits = 2) %>%
  kable_styling()
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
disease_onset_age:subtype 51 26 2 877 0.6 0.55

Use simple slopes from jtools to extract adjusted slopes for each subtype.

fit.age2 <- lmer(BrainPAD ~ disease_onset_age * subtype + age_at_baseline_scan1 + gender + field_strength + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
sim_slopes(fit.age2, pred = "disease_onset_age", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of disease_onset_age when subtype = RRMS: 
##   Est. S.E.    df    p
##  -0.37 0.05 -6.75 0.00
## 
## Slope of disease_onset_age when subtype = SPMS: 
##   Est. S.E.    df    p
##  -0.57 0.09 -6.39 0.00
## 
## Slope of disease_onset_age when subtype = PPMS: 
##   Est. S.E.    df    p
##  -0.51 0.10 -5.35 0.00
age.plot <- interact_plot(fit.age2, pred = "disease_onset_age", modx = "subtype",
                          plot.points = T, interval = T, vary.lty = T, facet.modx = T, 
                          x.label = "Age at clincial diagnosis (years)", y.label = "Brain-PAD (years)",
                          point.size = 0.5,
                          modx.labels = c("RRMS", "SPMS", "PPMS")) + 
  geom_hline(yintercept = 0, lty = 2) + 
  scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
  scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
  theme_cowplot(font_size = 9)
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_age_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)

Time since diagnosis

LME accounting for fixed effects of age at baseline, age^2 gender, and random effects of Cohort and field strength. Exclude controls, CIS patients and anyone with a time since diagnosis = 0.

summary(run_lm("disease_duration_at_baseline_scan1", subset(df.bl, df.bl$subtype != "CIS" & df.bl$disease_duration > 0)))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +  
##     wb_vol_ratio_icv + (1 | Cohort)
##    Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 5677
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.781 -0.601 -0.036  0.614  4.185 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Cohort   (Intercept) 10.8     3.28    
##  Residual             43.4     6.59    
## Number of obs: 857, groups:  Cohort, 14
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            128.0279     4.3956  774.8252   29.13  < 2e-16 ***
## data[[var]]              0.1558     0.0364  849.2322    4.28  2.1e-05 ***
## poly(age_at_scan, 2)1 -165.1610     8.3879  849.5646  -19.69  < 2e-16 ***
## poly(age_at_scan, 2)2  -13.7866     6.7148  839.9938   -2.05   0.0404 *  
## gendermale              -0.1343     0.4841  838.7126   -0.28   0.7815    
## field_strength3T         2.6556     0.9757  181.8368    2.72   0.0071 ** 
## wb_vol_ratio_icv      -151.6503     5.3483  845.1505  -28.35  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.339                                       
## ply(g__,2)1 -0.266 -0.317                                
## ply(g__,2)2  0.058 -0.024 -0.008                         
## gendermale  -0.222  0.082  0.062    0.060                
## fld_strng3T -0.067 -0.053  0.009   -0.006   -0.043       
## wb_vl_rt_cv -0.971  0.283  0.303   -0.063    0.187 -0.014

When predicting brain-PAD in a LME model, the effect of time since diagnosis at baseline beta = 0.16, 95% CI = 0.08, 0.23, p = 2e-05.

Test for interaction between subtype and time since diagnosis on brain-PAD

fit.time <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
round(as.matrix(anova(fit.time)["disease_duration_at_baseline_scan1:subtype",]),3) %>%
  kable(digits = 2) %>%
  kable_styling()
Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
disease_duration_at_baseline_scan1:subtype 84 42 2 797 0.97 0.38

Use simple slopes from jtools to extract adjusted slopes for each subtype.

fit.time2 <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
sim_slopes(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of disease_duration_at_baseline_scan1 when subtype = RRMS: 
##  Est. S.E.   df    p
##  0.19 0.04 4.29 0.00
## 
## Slope of disease_duration_at_baseline_scan1 when subtype = SPMS: 
##  Est. S.E.   df    p
##  0.08 0.07 1.06 0.29
## 
## Slope of disease_duration_at_baseline_scan1 when subtype = PPMS: 
##  Est. S.E.   df    p
##  0.02 0.13 0.12 0.90
time.plot <- interact_plot(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype",
                           plot.points = T, interval = T, vary.lty = T, facet.modx = T, point.size = 0.5,
                           x.label = "Time since clinical diagnosis (years)", y.label = "Brain-PAD (years)",
                           modx.labels = c("RRMS", "SPMS", "PPMS")) +
  geom_hline(yintercept = 0, lty = 2) +
  scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
  scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
  theme_cowplot(font_size = 9)
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_years_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)

Arrange EDSS, age at diagnosis and time since diagnosis plots

Use cowplot package.

cowplot::plot_grid(edss.plot, age.plot, time.plot, labels = "AUTO", ncol = 1)

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/clinical_brain-PAD_plots.pdf", height = 15, width = 8, useDingbats = FALSE)
ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig2_clinical_brain-PAD_plots.tif", height = 160, width = 80, device = "tiff", units = "mm", dpi = 300)

Brain age adjusted for brain volumes

Function to get standardised beta coefficients from lmer, from stack exchange: https://stats.stackexchange.com/questions/123366/lmer-standardized-regression-coefficients

lm.beta.lmer <- function(mod) {
   b <- fixef(mod)[-1]
   sd.x <- apply(getME(mod,"X")[,-1],2,sd)
   sd.y <- sd(getME(mod,"y"))
   b*sd.x/sd.y
}

Baseline clinical measures, adjusting for whole-brain volume as ratio of ICV, along with standard covariates.

EDSS_baseline <- lm.beta.lmer(lmer(EDSSbaseline ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Time_since_diagnosis <- lm.beta.lmer(model <- lmer(disease_duration ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Age_at_onset <- lm.beta.lmer(lmer(disease_onset_age ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
round(as.data.frame(rbind(EDSS_baseline, Time_since_diagnosis, Age_at_onset)),3) %>%
  kable() %>%
  kable_styling(full_width = F, position = "left")
BrainPAD wb_vol_ratio_icv poly(age_at_scan, 2)1 poly(age_at_scan, 2)2 gendermale field_strength3T
EDSS_baseline 0.14 -0.14 0.26 0.10 0.00 -0.07
Time_since_diagnosis 0.24 -0.10 0.41 0.07 -0.04 -0.06
Age_at_onset -0.20 0.09 0.77 -0.06 0.03 0.04

DMT

Effects of disease modifying treatment on brain-PAD.

table(df.bl$DMT_YesNoNA, df.bl$subtype) %>%
  kable() %>%
  kable_styling(full_width = F, position = "left")
control CIS RRMS SPMS PPMS
NO 0 234 285 52 104
YES 0 60 356 51 8

Descriptive statistics brain-PAD by DMT status

df.bl %>%
  filter(control0rest1 == "MS") %>%
  group_by(DMT_YesNoNA) %>%
  dplyr::summarise(n = n(), mean = mean(BrainPAD), sd = sd(BrainPAD), min = min(BrainPAD), max = max(BrainPAD)) %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F, position = "left")
DMT_YesNoNA n mean sd min max
NO 675 8.2 9.1 -13.07 47
YES 475 14.2 10.7 -18.18 49
NA 54 14.8 9.9 -0.73 42

Fit LME model

fit_DMT <- lmer(BrainPAD ~ 
                  subtype +
                  poly(age_at_scan, 2) +
                  gender +
                  field_strength +
                  wb_vol_ratio_icv +
                  DMT_YesNoNA +
                  (1|Cohort),
                data = df.bl,
                control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_DMT, pairwise ~ DMT_YesNoNA)
## $emmeans
##  DMT_YesNoNA emmean   SE   df lower.CL upper.CL
##  NO            10.2 0.70 21.3      8.8     11.6
##  YES           12.1 0.75 27.6     10.6     13.7
## 
## Results are averaged over the levels of: subtype, gender, field_strength 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE   df t.ratio p.value
##  NO - YES    -1.91 0.49 1072 -3.900  0.0001 
## 
## Results are averaged over the levels of: subtype, gender, field_strength 
## Degrees-of-freedom method: kenward-roger

EDSS progression survival analysis

Based on Arman Eshaghi’s code used in Eshaghi et al., 2018 Annals of Neurology. Function for characterising EDSS progression, based on different rates of change and different baseline EDSS values

is_sustained_progression <- function(edssAtStart, change){
  sustainedProgression <- FALSE
  #if start of edss is 0, 1.5 increase is considered sustained progression
  if ((edssAtStart < 1) & (change >= 1.5)) {
    sustainedProgression <- TRUE
  }
  #if start of edss is 6 or above, 0.5 increase is considered sustained progression
  else if ((edssAtStart >= 6) & (change >= 0.5 )) {
    sustainedProgression <- TRUE
  }
  #if start of edss is more than zero but less than 6, sustained progression is by 1 increase in edss
  else if ((edssAtStart >= 1 ) & (edssAtStart < 6 )  & (change >= 1  )) {
    sustainedProgression <- TRUE
  }
  return(sustainedProgression)
}

Determine change in EDSS from baseline to last follow-up

Select latest EDSS per subject in subjects with 2 or more assessments

y1 <- df %>%
  filter(!subtype == "control")  %>%
  filter(!is.na(EDSSbaseline)) %>%
  group_by(PatientID) %>%
  top_n(1, interval) %>%
  ungroup() %>%
  dplyr::rename(latest_EDSS = EDSSatScan) %>%
  dplyr::select(PatientID, interval, EDSSbaseline, latest_EDSS) %>%
  filter(!is.na(latest_EDSS)) %>%
  mutate(EDSSchange = latest_EDSS - EDSSbaseline)

# apply Arman's function
y1$EDSS_progression <-  mapply(is_sustained_progression, y1$EDSSbaseline, y1$EDSSchange) 

## get baseline brain-PAD and brain volumetric measures
y2 <- df %>%
  filter(!subtype == "control")  %>%
  filter(interval == 0) %>%
  filter(!is.na(EDSSbaseline)) %>%
  dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
  dplyr::rename(GM_vol_baseline = GM_vol) %>%
  dplyr::rename(WM_vol_baseline = WM_vol) %>%
  dplyr::rename(CSF_vol_baseline = CSF_vol) %>%
  dplyr::rename(WBV_baseline = WBV) %>%
  dplyr::rename(ICV_baseline = ICV) %>%
  dplyr::select(-one_of('interval'))

y3 <- right_join(y1, y2, by = c("PatientID")) %>%
  filter(!is.na(latest_EDSS))

Numbers of EDSS progressors

The number of MS patients with >= 2 EDSS scores was 1143.

table(y3$EDSS_progression) # calculate proportion of patients who progress
## 
## FALSE  TRUE 
##   840   303
round(prop.table(table(y3$EDSS_progression)),3) # calculate percentage of patients who progress
## 
## FALSE  TRUE 
##  0.73  0.27

Run survival analysis

Using brain-PAD with normal covariates

# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1, 
##     2) + gender + field_strength, data = y3)
## 
##   n= 1143, number of events= 303 
## 
##                                     coef exp(coef) se(coef)    z Pr(>|z|)
## BrainPAD_baseline               2.30e-02  1.02e+00 5.80e-03 3.97  7.3e-05
## poly(age_at_baseline_scan1, 2)1 1.11e+01  6.33e+04 2.12e+00 5.22  1.8e-07
## poly(age_at_baseline_scan1, 2)2 8.87e-01  2.43e+00 2.07e+00 0.43    0.669
## gendermale                      2.32e-01  1.26e+00 1.20e-01 1.93    0.054
## field_strength3T                3.47e-01  1.42e+00 1.70e-01 2.04    0.042
##                                    
## BrainPAD_baseline               ***
## poly(age_at_baseline_scan1, 2)1 ***
## poly(age_at_baseline_scan1, 2)2    
## gendermale                      .  
## field_strength3T                *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                 exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline                    1.02   9.77e-01    1.0117  1.03e+00
## poly(age_at_baseline_scan1, 2)1  63298.14   1.58e-05  998.7908  4.01e+06
## poly(age_at_baseline_scan1, 2)2      2.43   4.12e-01    0.0418  1.41e+02
## gendermale                           1.26   7.93e-01    0.9961  1.60e+00
## field_strength3T                     1.42   7.07e-01    1.0132  1.98e+00
## 
## Concordance= 0.648  (se = 0.017 )
## Rsquare= 0.051   (max possible= 0.953 )
## Likelihood ratio test= 60.2  on 5 df,   p=1e-11
## Wald test            = 62.8  on 5 df,   p=3e-12
## Score (logrank) test = 65.5  on 5 df,   p=9e-13
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
##                                       rho    chisq        p
## BrainPAD_baseline                0.145349 7.54e+00 0.006035
## poly(age_at_baseline_scan1, 2)1 -0.068122 1.44e+00 0.229532
## poly(age_at_baseline_scan1, 2)2  0.166316 7.78e+00 0.005279
## gendermale                      -0.000737 1.67e-04 0.989698
## field_strength3T                -0.163475 9.14e+00 0.002502
## GLOBAL                                 NA 2.22e+01 0.000485

Check that the brain-PAD line is horizontal.

plot(cox.zph(surv.model)[1])

The hazard ratio for brain-PAD on time-to-disease-progression was HR (95% CI) = 1.02, 1.01, 1.03. That means for every additional +1 year of brain-PAD there is a 1.02% increase in the likelihood of EDSS progression. Extrapolated over 5 years of brain-PAD, there is a 1.12 increase in the likelihood of EDSS progression.

Adding normalised brain volume as a covariate

# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1, 
##     2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
## 
##   n= 1143, number of events= 303 
## 
##                                      coef exp(coef)  se(coef)     z
## BrainPAD_baseline               -4.81e-03  9.95e-01  7.83e-03 -0.61
## poly(age_at_baseline_scan1, 2)1  2.10e+00  8.14e+00  2.71e+00  0.77
## poly(age_at_baseline_scan1, 2)2 -2.02e-01  8.17e-01  2.09e+00 -0.10
## gendermale                       4.87e-02  1.05e+00  1.25e-01  0.39
## field_strength3T                 3.86e-01  1.47e+00  1.71e-01  2.25
## wb_vol_ratio_icv                -9.42e+00  8.14e-05  1.73e+00 -5.45
##                                 Pr(>|z|)    
## BrainPAD_baseline                  0.539    
## poly(age_at_baseline_scan1, 2)1    0.438    
## poly(age_at_baseline_scan1, 2)2    0.923    
## gendermale                         0.697    
## field_strength3T                   0.024 *  
## wb_vol_ratio_icv                 5.1e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                 exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline                9.95e-01   1.00e+00  9.80e-01  1.01e+00
## poly(age_at_baseline_scan1, 2)1  8.14e+00   1.23e-01  4.04e-02  1.64e+03
## poly(age_at_baseline_scan1, 2)2  8.17e-01   1.22e+00  1.37e-02  4.88e+01
## gendermale                       1.05e+00   9.52e-01  8.22e-01  1.34e+00
## field_strength3T                 1.47e+00   6.80e-01  1.05e+00  2.06e+00
## wb_vol_ratio_icv                 8.14e-05   1.23e+04  2.75e-06  2.41e-03
## 
## Concordance= 0.661  (se = 0.018 )
## Rsquare= 0.074   (max possible= 0.953 )
## Likelihood ratio test= 87.9  on 6 df,   p=<2e-16
## Wald test            = 96.8  on 6 df,   p=<2e-16
## Score (logrank) test = 98.8  on 6 df,   p=<2e-16
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
##                                     rho  chisq        p
## BrainPAD_baseline                0.0804  2.224 0.135892
## poly(age_at_baseline_scan1, 2)1 -0.0893  2.830 0.092543
## poly(age_at_baseline_scan1, 2)2  0.1581  7.318 0.006827
## gendermale                      -0.0284  0.268 0.604561
## field_strength3T                -0.1616  8.835 0.002955
## wb_vol_ratio_icv                -0.0612  1.328 0.249135
## GLOBAL                               NA 24.602 0.000405

Time-to-EDSS progression Kaplan-Meier plots

Run survplot on survival object

Based on a median split of brain-PAD.

km.plot.df <- y3 %>% dplyr::mutate(split_BrainPAD = ntile(BrainPAD_baseline, 2))
S <- Surv(time = km.plot.df$interval, event = km.plot.df$EDSS_progression) # response function
sv.object <- survfit(S ~ split_BrainPAD, data = km.plot.df)
# survplot <- ggsurvplot(sv.object, 
#                        ggtheme = theme_cowplot(font_size = 5, line_size = 0.2, rel_large = 1),
#                        risk.table = F,
#                        risk.table.fontsize = 1.2,
#                        cumcensor = F,
#                        conf.int = T,
#                        palette = c("blue", "red2"),
#                        linetype = c(1,3),
#                        size = 0.5,
#                        censor = F,
#                        xlab = "Time (years)",
#                        legend = c(0.7,0.85),
#                        legend.labs = c("Brain-PAD<median", "Brain-PAD>median"))
# survplot$plot <- survplot$plot + theme(legend.key.size = unit(1, "line"))
# survplot
# # ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/KM_brain-PAD_plot.pdf", height = 8, width = 8, print(survplot, newpage = FALSE), useDingbats = FALSE)
# ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig5_KM_brain-PAD.tif", height = 50, width = 80, print(survplot, newpage = FALSE), device = "tiff", dpi = 300, units = "mm")

The median value = 9.68 years.

Longitudinal brain-age analysis

The total number of people with two or more scans was n = 1266. This included n = 1155 MS patients and n = 111 controls.

With 3 or more scans, there were n = 509 MS patients and n = 64 controls.

Determine change in brain-PAD from baseline to last follow-up

## select latest brain-PAD per subject in subjects with 2 or more assessments
z1 <- df %>%
  filter(NoScans >= 2)  %>%
  group_by(PatientID) %>%
  top_n(1, interval) %>%
  ungroup() %>%
  dplyr::rename(latest_BrainPAD = BrainPAD) %>%
  dplyr::rename(latest_wb_vol_ratio_icv = wb_vol_ratio_icv) %>%
  dplyr::select(PatientID, interval, latest_BrainPAD, latest_wb_vol_ratio_icv)

## baseline brain-PAD
z2 <- df %>%
  filter(NoScans >= 2)  %>%
  filter(interval == 0) %>%
  filter(!is.na(BrainPAD)) %>%
  dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
  #dplyr::rename(GM_vol_baseline = GM_vol) %>%
  #dplyr::rename(WM_vol_baseline = WM_vol) %>%
  dplyr::rename(wb_vol_baseline = wb_vol_ratio_icv) %>%
  dplyr::select(-one_of('interval'))

## calculate change in brain-PAD between baseline and latest brain-PAD
z3 <- right_join(z1, z2, by = c("PatientID")) %>%
  mutate(BrainPAD_change = latest_BrainPAD - BrainPAD_baseline) %>%
  mutate(wb_vol_ratio_icv_change = latest_wb_vol_ratio_icv - wb_vol_baseline)

Mean annualised rates of change in brain-PAD per group

z3 %>%
  dplyr::group_by(subtype) %>%
  dplyr::summarise(n = n(), mean = mean(BrainPAD_change/interval, na.rm = T), SD = sd(BrainPAD_change/interval, na.rm = T), median = median(BrainPAD_change/interval, na.rm = T), min = min(BrainPAD_change/interval, na.rm = T), max = max(BrainPAD_change/interval, na.rm = T)) %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F, position = "left")
subtype n mean SD median min max
control 111 0.03 2.0 -0.09 -4.7 9.9
CIS 279 0.23 2.2 0.12 -11.7 11.5
RRMS 653 0.29 1.7 0.17 -12.2 16.2
SPMS 104 -0.29 1.6 -0.24 -6.1 7.0
PPMS 119 0.40 1.5 0.30 -4.7 4.2

Determine change in EDSS from baseline to last follow-up

## select latest EDSS per subject in subjects with 2 or more assessments
a1 <- df %>%
  filter(NoScans >= 2)  %>%
  group_by(PatientID) %>%
  top_n(1, interval) %>%
  ungroup() %>%
  dplyr::rename(latest_EDSSatScan = EDSSatScan) %>%
  dplyr::select(PatientID, interval, latest_EDSSatScan) 

## baseline EDSS
a2 <- df %>%
  filter(NoScans >= 2)  %>%
  filter(interval == 0) %>%
  filter(!is.na(EDSSatScan)) %>%
  dplyr::rename(EDSSatScan_baseline = EDSSatScan) %>%
  dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
  #dplyr::rename(GM_vol_baseline = GM_vol) %>%
  #dplyr::rename(WM_vol_baseline = WM_vol) %>%
  dplyr::rename(WB_vol_baseline = wb_vol_ratio_icv) %>%
  dplyr::select(-one_of('interval'))

## calculate change in brain-PAD between baseline and latest brain-PAD
a3 <- right_join(a1, a2, by = c("PatientID")) %>%
  mutate(EDSS_change = latest_EDSSatScan - EDSSatScan_baseline)

Mean annualised rates of change in EDSS per group

a3 %>%
  dplyr::group_by(subtype) %>%
  dplyr::summarise(n = n(), mean = mean(EDSS_change/interval, na.rm = T), SD = sd(EDSS_change/interval, na.rm = T), median = median(EDSS_change/interval, na.rm = T), min = min(EDSS_change/interval, na.rm = T), max = max(EDSS_change/interval, na.rm = T)) %>%
  kable(digits = 2) %>%
  kable_styling(full_width = F, position = "left")
subtype n mean SD median min max
CIS 257 -0.26 1.05 0.00 -6.86 3.0
RRMS 649 0.12 0.45 0.00 -2.24 3.3
SPMS 104 0.14 0.29 0.00 -0.64 1.3
PPMS 119 0.36 0.63 0.17 -1.01 3.0

Relationship between annualised EDSS change and brain-PAD change

The total number of patients with two or more scans was n = 1155.

delta.df <- join(a3, z3)
with(delta.df, cor.test(EDSS_change, BrainPAD_change, method = "pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  EDSS_change and BrainPAD_change
## t = 9, df = 1000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.21 0.32
## sample estimates:
##  cor 
## 0.26
with(delta.df[complete.cases(delta.df),], pcor.test(EDSS_change, BrainPAD_change, wb_vol_ratio_icv_change, method = "pearson"))
##   estimate p.value statistic   n gp  Method
## 1    0.081   0.011       2.5 975  1 pearson

Historgams of baseline EDSS scores and EDSS changes

plot1 <- ggplot(df.bl, aes(x = EDSSbaseline)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS at baseline") + theme_cowplot()
plot2 <- ggplot(delta.df, aes(x = EDSS_change)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS change") + theme_cowplot()
plot3 <- ggplot(subset(df.bl, df.bl$control0rest1 == "MS"), aes(x = BrainPAD)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD at baseline") + theme_cowplot()
plot4 <- ggplot(subset(delta.df, delta.df$control0rest1 == "MS"), aes(x = BrainPAD_change)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD change") + theme_cowplot()
plot5 <- ggplot(delta.df, aes(x = EDSS_change, y = BrainPAD_change)) + geom_point(aes(colour = subtype)) + geom_smooth(method = "lm", colour = "black") + labs(x = "EDSS change", y = "Brain-PAD change") + theme_cowplot() + scale_color_manual(values = ms.palette[-1], name = "MS subtype")

pg <- cowplot::plot_grid(cowplot::plot_grid(plot1, plot2, plot3, plot4, labels = "AUTO", ncol = 2), plot5, ncol = 2, labels = c("", "E"), rel_widths = c(2,1.5))
pg

cowplot::save_plot("~/Work/Brain ageing/Collaborations/MS/plots/histograms_change_EDSS_brain-PAD_plot.pdf", pg, base_asp = 2.5)

Interaction between subtype and EDSS change

fit.change <- lm(BrainPAD_change ~ EDSS_change * subtype, data = delta.df)
anova(fit.change)
## Analysis of Variance Table
## 
## Response: BrainPAD_change
##                       Df Sum Sq Mean Sq F value Pr(>F)    
## EDSS_change            1   1452    1452   82.48 <2e-16 ***
## subtype                3    212      71    4.01 0.0074 ** 
## EDSS_change:subtype    3    206      69    3.90 0.0087 ** 
## Residuals           1089  19174      18                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Covarying for change in normalised brain volumes

fit.change2 <- lm(BrainPAD_change ~ EDSS_change * subtype + wb_vol_ratio_icv_change, data = delta.df)
anova(fit.change2)
## Analysis of Variance Table
## 
## Response: BrainPAD_change
##                           Df Sum Sq Mean Sq F value  Pr(>F)    
## EDSS_change                1   1452    1452  117.03 < 2e-16 ***
## subtype                    3    212      71    5.70 0.00072 ***
## wb_vol_ratio_icv_change    1   5769    5769  464.99 < 2e-16 ***
## EDSS_change:subtype        3    111      37    2.98 0.03075 *  
## Residuals               1088  13500      12                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Use jtools package to get slopes from the model, per subtype.

sim_slopes(fit.change, pred = "EDSS_change", modx = "subtype", johnson_neyman = F, digits = 4)
## SIMPLE SLOPES ANALYSIS 
## 
## Slope of EDSS_change when subtype = CIS: 
##    Est.   S.E. t val.      p
##  0.8434 0.2189 3.8524 0.0001
## 
## Slope of EDSS_change when subtype = RRMS: 
##    Est.   S.E. t val.      p
##  1.2534 0.1459 8.5883 0.0000
## 
## Slope of EDSS_change when subtype = SPMS: 
##     Est.   S.E.  t val.      p
##  -0.6953 0.6510 -1.0680 0.2857
## 
## Slope of EDSS_change when subtype = PPMS: 
##    Est.   S.E. t val.      p
##  0.5882 0.3464 1.6983 0.0897
interact_plot(fit.change, pred = "EDSS_change", modx = "subtype", plot.points = T, interval = T, facet.modx = T, point.size = 0.6, line.thickness = 0.5,
              x.label = "EDSS annualised change", y.label = "Brain-PAD annualised change", modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) +
  geom_hline(yintercept = 0, lty = 2) + theme_cowplot(font_size = 9)  +
  scale_fill_manual(values = ms.palette[-1], name = "MS subtype") +
  scale_color_manual(values = ms.palette[-1], name = "MS subtype")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/change_EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig3_change_EDSS_brain-PAD.tif", height = 60, width = 80, device = "tiff", units = "mm", dpi = 300)

Correlate baseline brain-PAD with the number of follow-up scans completed in the n=104 with >1 scan.

with(subset(df.bl, df.bl$NoScans > 1 & df.bl$subtype == "SPMS"), cor.test(BrainPAD, NoScans, method = "spearman"))
## Warning in cor.test.default(BrainPAD, NoScans, method = "spearman"): Cannot
## compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  BrainPAD and NoScans
## S = 2e+05, p-value = 0.003
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##   rho 
## -0.29

Longitudinal brain-predicted age trajectories

Interaction between group and time MS vs. controls

Conditional growth model - random effects of participant and cohort

model_int.group <- lmer(BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = df, control = lmerControl(optimizer = "Nelder_Mead")) 
round(anova(model_int.group)["control0rest1:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
##                        Sum Sq Mean Sq NumDF DenDF F value Pr(>F)  
## control0rest1:interval   27.7    27.7     1  1326    5.84  0.016 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_int.group)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1,  
##     2) + gender + field_strength + (interval | PatientID) + (1 |  
##     Cohort)
##    Data: df
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
## REML criterion at convergence: 21373
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -9.741 -0.358 -0.014  0.345  8.603 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev. Corr 
##  PatientID (Intercept) 82.887   9.104         
##            interval     0.461   0.679    -0.06
##  Cohort    (Intercept) 15.207   3.900         
##  Residual               4.738   2.177         
## Number of obs: 3564, groups:  PatientID, 1354; Cohort, 15
## 
## Fixed effects:
##                                  Estimate Std. Error        df t value
## (Intercept)                        0.0513     1.4471   38.4207    0.04
## control0rest1MS                   12.1417     1.0279 1258.5379   11.81
## interval                          -0.1655     0.1496 1374.1462   -1.11
## poly(age_at_baseline_scan1, 2)1  -59.1972    16.3337 1335.7802   -3.62
## poly(age_at_baseline_scan1, 2)2  -33.7926    14.9329 1340.4326   -2.26
## gendermale                         1.9018     0.5263 1337.0517    3.61
## field_strength3T                  -3.2593     0.9971  347.0562   -3.27
## control0rest1MS:interval           0.3722     0.1539 1325.5026    2.42
##                                 Pr(>|t|)    
## (Intercept)                      0.97191    
## control0rest1MS                  < 2e-16 ***
## interval                         0.26873    
## poly(age_at_baseline_scan1, 2)1  0.00030 ***
## poly(age_at_baseline_scan1, 2)2  0.02380 *  
## gendermale                       0.00031 ***
## field_strength3T                 0.00119 ** 
## control0rest1MS:interval         0.01575 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cn01MS intrvl p(___1,2)1 p(___1,2)2 gndrml fld_3T
## cntrl0rs1MS -0.613                                                  
## interval    -0.066  0.093                                           
## pl(___1,2)1  0.072 -0.159 -0.001                                    
## pl(___1,2)2  0.026 -0.055  0.003  0.026                             
## gendermale  -0.147  0.027  0.003  0.027      0.059                  
## fld_strng3T -0.275  0.024  0.002  0.016     -0.027     -0.041       
## cntrl0r1MS:  0.063 -0.098 -0.972  0.001     -0.003     -0.003  0.000

Use sjPlots to visualise the interactions, using estimated marginal means

plot_model(model_int.group, type = "emm", terms = c("interval [all]", "control0rest1"), colors = c("darkgreen", "firebrick"), legend.title = "Group", grid = F, show.data = F) +
  theme_cowplot() +
  labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
  geom_hline(yintercept = 0, lty = 2)

Generate estimate marginal trends for healthy controls and for all MS/CIS combined, using annualised difference between baseline and final follow-up.

Interaction between group and time MS subtypes

Conditional growth model - random effects of participant and cohort

model_int.subtype <- lmer(BrainPAD ~ subtype * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$subtype != "control"), control = lmerControl(optimizer = "Nelder_Mead")) 
round(anova(model_int.subtype)["subtype:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF DenDF F value Pr(>F)   
## subtype:interval   78.2    26.1     3   846    4.99  0.002 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Generate EMMs for healthy controls and for MS subtypes, using annualised difference between baseline and final follow-up.

# emtrends(object = model_int.subtype, pairwise ~ subtype, var = "interval")
plot_model(model_int.subtype, type = "emm", terms = c("interval [all]", "subtype"),
           legend.title = "Group", show.data = F, grid = F, colors = "bw") +
  theme_cowplot() +
  labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
  geom_hline(yintercept = 0, lty = 2)

Longitudinal brain-PAD by interval plots

Randomly sub-sampling 50% of the data.

sub.sample.df <- df %>%
  filter(NoScans >= 2)  %>%
  group_by(PatientID) %>%
  sample_frac(0.5) %>%
  ungroup()

ggplot(data = sub.sample.df, aes(x = interval, y = BrainPAD, fill = subtype)) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
  geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
  labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
  scale_fill_manual(values = ms.palette) +
  scale_color_manual(values = ms.palette) +
  # facet_grid(Cohort) +
  theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-PAD_time_plot.pdf", height = 6, width = 6, useDingbats = FALSE)
## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data = df, aes(x = age_at_scan, y = BrainPAD, fill = subtype)) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_line(aes(group = PatientID, colour = subtype), alpha = 0.6, linetype = 1, size = 0.2) +
  labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
  scale_fill_manual(values = ms.palette) +
  scale_color_manual(values = ms.palette) +
  facet_wrap(~ subtype, scales = "free_x") +
  theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).

Age at diagnosis and longitudinal brain-PAD

model_int.onset <- lmer(BrainPAD ~ disease_onset_age * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling

## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.onset)
## Type III Analysis of Variance Table with Satterthwaite's method
##                                Sum Sq Mean Sq NumDF DenDF F value  Pr(>F)
## disease_onset_age                 937     937     1  1035  175.43 < 2e-16
## interval                           38      38     1   696    7.11  0.0079
## poly(age_at_baseline_scan1, 2)    347     173     2  1044   32.46 2.1e-14
## gender                             65      65     1  1127   12.18  0.0005
## field_strength                     26      26     1   179    4.87  0.0286
## disease_onset_age:interval          6       6     1   693    1.06  0.3042
##                                   
## disease_onset_age              ***
## interval                       ** 
## poly(age_at_baseline_scan1, 2) ***
## gender                         ***
## field_strength                 *  
## disease_onset_age:interval        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Time since diagnosis and longitudinal brain-PAD

model_int.duration <- lmer(BrainPAD ~ disease_duration * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling

## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.duration)
## Type III Analysis of Variance Table with Satterthwaite's method
##                                Sum Sq Mean Sq NumDF DenDF F value  Pr(>F)
## disease_duration                  934     934     1  1014  179.95 < 2e-16
## interval                          332     332     1   526   63.90 8.3e-15
## poly(age_at_baseline_scan1, 2)    462     231     2  1076   44.56 < 2e-16
## gender                             62      62     1  1085   11.87 0.00059
## field_strength                     23      23     1   172    4.52 0.03500
## disease_duration:interval         173     173     1   748   33.27 1.2e-08
##                                   
## disease_duration               ***
## interval                       ***
## poly(age_at_baseline_scan1, 2) ***
## gender                         ***
## field_strength                 *  
## disease_duration:interval      ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
onset.plt <- plot_model(model_int.onset, type = "pred", terms = c("interval [all]", "disease_onset_age[20,30,40,50,60]"), legend.title = "Onset age (years)", show.data = F, grid = F, colors = "bw") +
  geom_hline(yintercept = 0, lty = 2, lwd = 0.5) +
  labs(x = "Time since baseline (years)", y = "Brain-PAD (years)") +
  theme_cowplot(font_size = 8) +
  theme(legend.key.size = unit(1.5, "line"))
duration.plt <- plot_model(model_int.duration, type = "pred", terms = c("interval [all]", "disease_duration[0,7.5,15]"), legend.title = "Years since diagnosis", show.data = F, grid = F, colors = "bw") +
  geom_hline(yintercept = 0, lty = 2, lwd = 0.5) +
  labs(x = "Time since baseline (years)", y = "Brain-PAD (years)") +
  theme_cowplot(font_size = 8) +
  theme(legend.key.size = unit(1.5, "line"))
cowplot::plot_grid(onset.plt, duration.plt, nrow = 2)

ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig4_time_onset.tif", height = 120, width = 80, device = "tiff", units = "mm", dpi = 300)

Plot(s) with age and estimated age for each individual from each group and site

ggplot(data = df, aes(x = age_at_scan, y = BrainAge, fill = subtype)) +
  geom_abline(slope = 1, lty = 2) +
  geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
  geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
  labs(x = "Age (years)", y = "Brain-predicted age (years)") +
  scale_fill_manual(values = ms.palette) +
  scale_color_manual(values = ms.palette) +
  facet_wrap(~ Cohort) +
  theme_cowplot() + theme(legend.position = "none")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-age_years_plot.pdf", height = 6, width = 10, useDingbats = FALSE)